It is estimated that as many as 25 percent of individuals that are infected with COVID-19 remain asymptomatic link. Widespread of Covid-19 testing is key to helping community identify COVID-19 infected people, and trace and isolate contacts. Pressure to perform widespread testing is growing as public health experts and ordinary citizens question the safety of reopening schools and busineses across the U.S. without better information about who is infected and at risk of spreading the virus link.
This notebook provides an overview of Covid-19 widespread testing progress in U.S. Daily counts of testing cases in each state across U.S. are acquired from The COVID Tracking Project (CTP). The latest version is always available at https://covidtracking.com/api/v1/states/current.json
Uncomment the lines to install libraries you need. Import required modules:
# pip install plotly==4.7.1
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
import requests as request
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import itertools
from itertools import islice
from collections import deque
states_json = request.get("https://covidtracking.com/api/v1/states/current.json")
states_daily_json = request.get("https://covidtracking.com/api/v1/states/daily.json")
us_json = request.get("https://covidtracking.com/api/v1/us/current.json")
states = pd.DataFrame.from_dict(states_json.json())
states_daily = pd.DataFrame.from_dict(states_daily_json.json())
us = pd.DataFrame.from_dict(us_json.json())
Convert date column to datetimes format
s = "-"
date = [s.join([str(i)[0:4], str(i)[4:6], str(i)[6:]]) for i in states_daily["date"]]
states_daily["date"] = date
To adjust for population size, the Census data for each state were retrieved from US Census Estimates 2019 (https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv).
Please note: state level population data are available for 52 States & Territories.
popu = pd.read_csv(
"https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv",
encoding="latin-1",
)
popu = popu.rename(columns={"NAME": "State"})
# Convert state abbreviations to state names
states_abbr = {
"AK": "Alaska",
"AL": "Alabama",
"AR": "Arkansas",
"AS": "American Samoa",
"AZ": "Arizona",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DC": "District of Columbia",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"GU": "Guam",
"HI": "Hawaii",
"IA": "Iowa",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"MA": "Massachusetts",
"MD": "Maryland",
"ME": "Maine",
"MI": "Michigan",
"MN": "Minnesota",
"MO": "Missouri",
"MP": "Northern Marianas",
"MS": "Mississippi",
"MT": "Montana",
"NC": "North Carolina",
"ND": "North Dakota",
"NE": "Nebraska",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NV": "Nevada",
"NY": "New York",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"PR": "Puerto Rico Commonwealth",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VA": "Virginia",
"VI": "Virgin Islands",
"VT": "Vermont",
"WA": "Washington",
"WI": "Wisconsin",
"WV": "West Virginia",
"WY": "Wyoming",
}
states_daily["State"] = states_daily["state"].map(states_abbr)
# Since there were no data in the early dates except Washington and New York, we need to fill those rows to make sure all the states are there for every date.
states = states_daily.State.unique()
dates = states_daily.date.unique()
for date in dates:
daily = states_daily.loc[states_daily["date"] == date]
daily_state = daily["State"].tolist()
for i in states:
if i not in daily_state:
states_daily = states_daily.append(
{
"State": i,
"date": date,
"positive": 0,
"positiveIncrease": 0,
"totalTestResults": 0,
"totalTestResultsIncrease": 0,
},
ignore_index=True,
)
states_daily["TotalTestPerCase"] = (
states_daily["totalTestResults"] / states_daily["positive"]
)
states_daily["DailyTestPerCase"] = (
states_daily["totalTestResultsIncrease"] / states_daily["positiveIncrease"]
)
states_popu = pd.merge(
states_daily, popu[["State", "POPESTIMATE2019"]], on="State", how="inner"
)
states_popu["PositivePer10000"] = (
states_popu["positive"] / states_popu["POPESTIMATE2019"] * 10000
)
states_popu["PositiveIncreasePer1M"] = (
states_popu["positiveIncrease"] / states_popu["POPESTIMATE2019"] * 1000000
)
states_popu["TotalTestPer1000"] = (
states_popu["totalTestResults"] / states_popu["POPESTIMATE2019"] * 1000
)
states_popu["TotalTestIncreasePer1000"] = (
states_popu["totalTestResultsIncrease"] / states_popu["POPESTIMATE2019"] * 1000
)
We monitored the total number of testing starting from March 1 2020. Most of the states have limited data before then.
fig = px.bar(
states_daily.sort_values(by=["date", "State"]).iloc[2184:],
y="totalTestResults",
x="State",
animation_frame="date",
animation_group="State",
color="State",
range_x=[-0.5, 56],
range_y=[1, 6000000],
log_y=True,
)
fig.update_layout(
title="Total COVID-19 tests in 56 U.S. States & Territories",
xaxis_title="",
yaxis_title="Total tests",
autosize=True,
width=1000,
height=650,
showlegend=False,
margin=dict(l=5, r=5),
yaxis_type="log",
)
fig.update_xaxes(ticks="outside")
fig.update_xaxes(tickangle=-30, tickfont=dict(size=9))
fig.update_layout(sliders=[dict(transition=dict(duration=10), len=0.85)])
fig.show("notebook")
We monitored the testing coverage starting from March 21, 2020, since most of the states have less than 5 per 1,000 residents tested before then.
fig = px.bar(
states_popu.sort_values(by=["date", "State"]).iloc[3068:],
y="TotalTestPer1000",
x="State",
animation_frame="date",
animation_group="State",
color="State",
range_x=[-0.5, 52],
range_y=[0, 130],
)
fig.update_layout(
title="Total COVID-19 tests per 1,000 residents in 52 U.S. States & Territories",
xaxis_title="",
yaxis_title="Total tests per 1,000 residents",
autosize=True,
width=1000,
height=650,
showlegend=False,
margin=dict(l=5, r=5),
)
fig.update_layout(sliders=[dict(transition=dict(duration=10), len=0.85)])
fig.update_xaxes(ticks="outside")
fig.update_xaxes(tickangle=-30, tickfont=dict(size=9))
fig.show("notebook")
df = states_daily.pivot_table(
index="date", columns="State", values="totalTestResultsIncrease"
)
df = df.fillna(0)
datetime_index = pd.DatetimeIndex(df.index)
df.set_index(datetime_index, inplace=True)
df = df.sort_values(by="date")
# Function to calculate 5 days moving average
def five_moving_average(data):
it = iter(data)
d = deque(islice(it, 5))
divisor = float(5)
s = sum(d)
yield s / divisor
for elem in it:
s += elem - d.popleft()
d.append(elem)
yield s / divisor
# Calculate 5 days moving average table
average_table = df[2:-2].copy()
for col in df.columns:
average_table[col] = list(five_moving_average(list(df[col])))
average_table_t = average_table.T
fig = px.imshow(
average_table_t.values.tolist(),
labels=dict(x="Date", y="State"),
y=average_table_t.index.tolist(),
x=average_table_t.columns.tolist(),
color_continuous_scale=px.colors.sequential.dense,
)
fig.update_layout(
title="Daily COVID-19 tests (5-day moving average) in 56 U.S. States & Territories",
autosize=True,
xaxis_title="",
yaxis_title="",
width=900,
height=1000,
margin=dict(l=30, r=0),
)
fig.update_layout(
coloraxis_colorbar=dict(
title="Average daily tests",
thicknessmode="pixels",
thickness=50,
lenmode="pixels",
len=350,
yanchor="top",
y=0.75,
xanchor="left",
x=-0.25,
ticks="outside",
)
)
fig.update_xaxes(ticks="outside")
fig.update_yaxes(tickfont=dict(size=9), side="right", ticks="outside")
fig.show("notebook")
df = states_popu.pivot_table(
index="date", columns="State", values="TotalTestIncreasePer1000"
)
df = df.fillna(0)
# Found error in original daily state data thaat the `totalTestResultsIncrease` on 2020-05-06
# for Puerto Rico Commonwealth is -9269.0. However, increase value shouldn't be negative.
# Replace this value with 0.
df[df < 0] = 0
datetime_index = pd.DatetimeIndex(df.index)
df.set_index(datetime_index, inplace=True)
df = df.sort_values(by="date")
# Calculate 5 days moving average table
average_table = df[2:-2].copy()
for col in df.columns:
average_table[col] = list(five_moving_average(list(df[col])))
average_table_t = average_table.T
fig = px.imshow(
average_table_t.values.tolist(),
labels=dict(x="Date", y="State"),
y=average_table_t.index.tolist(),
x=average_table_t.columns.tolist(),
color_continuous_scale=px.colors.sequential.dense,
)
fig.update_layout(
title="Daily COVID-19 tests (5-day moving average) per 1,000 residents in 52 U.S. States & Territories",
autosize=True,
xaxis_title="",
yaxis_title="",
width=900,
height=1000,
margin=dict(l=30, r=0),
)
fig.update_layout(
coloraxis_colorbar=dict(
title="Average daily tests<br>per 1,000 residents",
thicknessmode="pixels",
thickness=50,
lenmode="pixels",
len=350,
yanchor="top",
y=0.75,
xanchor="left",
x=-0.25,
ticks="outside",
)
)
fig.update_xaxes(ticks="outside")
fig.update_yaxes(tickfont=dict(size=9), side="right", ticks="outside")
fig.show("notebook")
The number of tests per confirmed case help us understand the true spread of the virus by accounting for the fact that a smaller outbreak requires less testing.
fig = px.bar(
states_daily.loc[states_daily.positive != 0]
.sort_values(by=["date", "State"])
.iloc[2179:],
y="TotalTestPerCase",
x="State",
animation_frame="date",
animation_group="State",
color="State",
range_x=[-0.5, 55],
range_y=[0, 240],
)
fig.update_layout(
title="Total COVID-19 tests per confirmed case in 56 U.S. States & Territories",
xaxis_title="",
yaxis_title="Total tests per confirmed case",
autosize=True,
width=1000,
height=650,
showlegend=False,
margin=dict(l=5, r=5),
)
fig.update_layout(sliders=[dict(transition=dict(duration=10), len=0.85)])
fig.update_xaxes(ticks="outside")
fig.update_xaxes(tickangle=-30, tickfont=dict(size=9))
fig.show("notebook")
fig = px.bar(
states_daily.loc[states_daily.positiveIncrease != 0]
.sort_values(by=["date", "State"])
.iloc[1960:],
y="DailyTestPerCase",
x="State",
animation_frame="date",
animation_group="State",
color="State",
range_x=[-0.5, 55],
range_y=[1, 2300],
log_y=True,
)
fig.update_layout(
title="Daily COVID-19 tests per newly confirmed case in 56 U.S. States & Territories",
xaxis_title="",
yaxis_title="Daily tests per newly confirmed case",
autosize=True,
width=1000,
height=650,
showlegend=False,
margin=dict(l=5, r=5),
yaxis_type="log",
)
fig.update_layout(sliders=[dict(transition=dict(duration=10), len=0.85)])
fig.update_xaxes(ticks="outside")
fig.update_xaxes(tickangle=-30, tickfont=dict(size=9))
fig.show("notebook")
The chart below shows the number of total tests against the number of total confirmed cases. The dash lines show the points on the chart where the number of tests are a fixed number of times larger than the number of confirmed cases – 1, 5, 10, 20, 50, 100 times larger.
fig = px.scatter(
states_daily.sort_values(by=["date", "State"]).iloc[2184:],
x="positive",
y="totalTestResults",
animation_frame="date",
animation_group="State",
color="State",
hover_name="State",
log_y=True,
log_x=True,
size_max=40,
range_x=[10, 500000],
range_y=[100, 3000000],
)
fig.update_traces(
marker=dict(size=9, line=dict(width=1, color="DarkSlateGrey")),
selector=dict(mode="markers"),
)
fig.update_layout(
title="Total COVID-19 tests conducted vs. Confirmed cases in 56 U.S. States & Territories",
xaxis_title="Total confirmed cases",
yaxis_title="Total tests",
autosize=True,
width=1000,
height=850,
margin=dict(l=5, r=40),
legend=dict(x=0, y=-0.32),
legend_orientation="h",
)
fig.update_layout(sliders=[dict(transition=dict(duration=10), len=0.85)])
fig.update_layout(
shapes=[
dict(
type="line",
yref="y",
y0=100,
y1=500000,
xref="x",
x0=100,
x1=500000,
line=dict(width=0.7, dash="dash"),
),
dict(
type="line",
yref="y",
y0=100,
y1=2500000,
xref="x",
x0=20,
x1=500000,
line=dict(width=0.7, dash="dash"),
),
dict(
type="line",
yref="y",
y0=100,
y1=3000000,
xref="x",
x0=10,
x1=300000,
line=dict(width=0.7, dash="dash"),
),
dict(
type="line",
yref="y",
y0=200,
y1=3000000,
xref="x",
x0=10,
x1=150000,
line=dict(width=0.7, dash="dash"),
),
dict(
type="line",
yref="y",
y0=1000,
y1=3000000,
xref="x",
x0=10,
x1=30000,
line=dict(width=0.7, dash="dash"),
),
dict(
type="line",
yref="y",
y0=500,
y1=3000000,
xref="x",
x0=10,
x1=60000,
line=dict(width=0.7, dash="dash"),
),
]
)
fig.update_layout(
annotations=[
dict(
xref="x",
yref="y",
x=2.35,
y=2.45,
text="1x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
dict(
xref="x",
yref="y",
x=2.2,
y=2.95,
text="5x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
dict(
xref="x",
yref="y",
x=2.1,
y=3.15,
text="10x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
dict(
xref="x",
yref="y",
x=2.02,
y=3.37,
text="20x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
dict(
xref="x",
yref="y",
x=1.82,
y=3.86,
text="100x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
dict(
xref="x",
yref="y",
x=1.9,
y=3.65,
text="50x",
textangle=-30,
font=dict(family="Arial", size=13, color="black"),
showarrow=False,
),
]
)
fig.show("notebook")
Lots of COVID-19 studies estimate the degree to which countries’ confirmed cases may underestimate total cases by comparing with the case fatality rate observed in large studies in China and South Korea. The intuition behind these estimates is that where the number of confirmed cases looks low against the number of deaths, this is a clear indication that the true number of cases is likely to be much, much higher. But the fundamental reason for this is the limited extent of testing.
The rate of tests per case gives us another way of approaching the same question, by looking at the extent of testing relative to the number of cases directly. And we apply this analysis to the U.S. states data from CTP.
The gap between U.S. states in terms of total tests for each confirmed case varies largely. Most states fall between 5x to 50x.
When the ratio of tests vs. confirmed cases is large, the reported number of infection is more reliable.